meteorite_data <- read.csv("data/meteorite-landings.csv")
meteorite_data <- meteorite_data %>%
clean_names() %>%
filter(year >= 1970 & year <= 2013) %>%
select(-fall)
meteorite_data_cleaned <- meteorite_data %>%
filter(!is.na(reclat) & !is.na(reclong)) %>%
filter(!is.na(mass) & !is.na(year))
summary(meteorite_data_cleaned)
## name id nametype recclass
## Length:35901 Min. : 4 Length:35901 Length:35901
## Class :character 1st Qu.:11000 Class :character Class :character
## Mode :character Median :22209 Mode :character Mode :character
## Mean :25989
## 3rd Qu.:40608
## Max. :57458
## mass year reclat reclong
## Min. : 0 Min. :1970 Min. :-87.37 Min. :-165.43
## 1st Qu.: 6 1st Qu.:1987 1st Qu.:-76.72 1st Qu.: 0.00
## Median : 25 Median :1997 Median :-71.50 Median : 53.93
## Mean : 1483 Mean :1995 Mean :-43.60 Mean : 66.10
## 3rd Qu.: 133 3rd Qu.:2003 3rd Qu.: 0.00 3rd Qu.: 157.22
## Max. :4000000 Max. :2013 Max. : 81.17 Max. : 178.20
## geo_location
## Length:35901
## Class :character
## Mode :character
##
##
##
meteorite_sf <- st_as_sf(meteorite_data_cleaned, coords = c("reclong", "reclat"), crs = 4326, remove = FALSE)
continents <- ne_countries(scale = "medium", returnclass = "sf") %>%
group_by(continent) %>%
summarise()
meteorite_data_with_continents <- st_join(meteorite_sf, continents["continent"], join = st_intersects)
meteorite_data_with_continents <- meteorite_data_with_continents %>%
mutate(continent = replace_na(continent, "Ocean"))
print(meteorite_data_with_continents)
## Simple feature collection with 35901 features and 10 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -165.4333 ymin: -87.36667 xmax: 178.2 ymax: 81.16667
## Geodetic CRS: WGS 84
## First 10 features:
## name id nametype recclass mass year reclat reclong
## 1 Acapulco 10 Valid Acapulcoite 1914 1976 16.88333 -99.90000
## 2 Aioun el Atrouss 423 Valid Diogenite-pm 1000 1974 16.39806 -9.57028
## 3 Akyumak 433 Valid Iron, IVA 50000 1981 39.91667 42.81667
## 4 Al Zarnkh 447 Valid LL5 700 2001 13.66033 28.96000
## 5 Alby sur Chéran 458 Valid Eucrite-mmict 252 2002 45.82133 6.01533
## 6 Almahata Sitta 48915 Valid Ureilite-an 3950 2008 20.74575 32.41275
## 7 Alta'ameem 2284 Valid LL5 6000 1977 35.27333 44.21556
## 8 Anlong 2305 Valid H5 2500 1971 25.15000 105.18333
## 9 Aomori 2313 Valid L6 320 1984 40.81056 140.78556
## 10 Ash Creek 48954 Valid L6 9500 2009 31.80500 -97.01000
## geo_location continent geometry
## 1 (16.883330, -99.900000) North America POINT (-99.9 16.88333)
## 2 (16.398060, -9.570280) Africa POINT (-9.57028 16.39806)
## 3 (39.916670, 42.816670) Asia POINT (42.81667 39.91667)
## 4 (13.660330, 28.960000) Africa POINT (28.96 13.66033)
## 5 (45.821330, 6.015330) Europe POINT (6.01533 45.82133)
## 6 (20.745750, 32.412750) Africa POINT (32.41275 20.74575)
## 7 (35.273330, 44.215560) Asia POINT (44.21556 35.27333)
## 8 (25.150000, 105.183330) Asia POINT (105.1833 25.15)
## 9 (40.810560, 140.785560) Asia POINT (140.7856 40.81056)
## 10 (31.805000, -97.010000) North America POINT (-97.01 31.805)
continents <- unique(meteorite_data_with_continents %>% pull(continent))
color_palette <- setNames(brewer.pal(length(continents), "Set1"), continents)
map <- plot_ly(type = 'scattermapbox', mode = 'markers')
for (cont in continents) {
continent_data <- subset(meteorite_data_with_continents, continent == cont)
map <- map %>%
add_trace(
data = continent_data,
lat = ~reclat,
lon = ~reclong,
marker = list(size = 4, color = color_palette[cont]),
text = ~paste("Name:", name, "<br>Year:", year, "<br>Mass:", mass, "<br>Continent:", continent),
name = cont
)
}
map <- map %>%
layout(
title = 'Global Distribution of Meteorite Landings by Continent (1970-2013)',
mapbox = list(
style = "open-street-map",
zoom = 1,
center = list(lat = 0, lon = 0)
),
legend = list(title = list(text = "Continent"))
)
map
yearly_landings <- meteorite_data_with_continents %>%
st_drop_geometry() %>%
group_by(year, continent) %>%
summarise(count = n(), .groups = 'drop')
line_plot_by_continent <- plot_ly(
data = yearly_landings,
x = ~year,
y = ~count,
type = 'scatter',
mode = 'lines',
color = ~continent
) %>%
layout(
title = 'Meteorite Landings Over Time by Continent',
xaxis = list(title = 'Year'),
yaxis = list(title = 'Number of Meteorite Landings'),
legend = list(title = list(text = 'Continent'))
)
line_plot_by_continent
If the p-value from Levene’s Test is greater than 0.05, proceed with a standard ANOVA test.
If the p-value is less than 0.05, consider using Welch’s ANOVA instead, which doesn’t assume equal variances.
meteorite_data_with_continents <- meteorite_data_with_continents %>%
mutate(continent = as.factor(continent))
levene_test <- leveneTest(mass ~ continent, data = meteorite_data_with_continents)
print(levene_test)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 7 24.75 < 2.2e-16 ***
## 35893
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The result of Levene’s Test shows a p-value of \(1.85 \times 10^{-11}\), which is far below 0.05. This indicates that the variances of mass across different continents are not equal. Given this result, we should proceed with Welch’s ANOVA instead of the standard ANOVA, as Welch’s ANOVA does not assume equal variances.
Null and Alternative Hypotheses \[ H_0: \mu_{\text{Africa}} = \mu_{\text{Asia}} = \mu_{\text{Europe}} = \mu_{\text{North America}} = \mu_{\text{South America}} = \mu_{\text{Oceania}} = \mu_{\text{Antarctica}}\] \[H_1: \text{At least one continent has a different mean meteorite mass.}\] Rejection Rule: Reject the null hypothesis if the p-value is less than the significance level.
\[\text{Reject } H_0 \text{ if } p\text{-value} < \alpha = 0.05.\]
welch_anova <- oneway.test(mass ~ continent, data = meteorite_data_with_continents, var.equal = FALSE)
print(welch_anova)
##
## One-way analysis of means (not assuming equal variances)
##
## data: mass and continent
## F = 10.461, num df = 7.0, denom df = 1417.5, p-value = 7.261e-13
\[F = 9.2968, \quad \text{df}_1 = 7, \quad \text{df}_2 = 2529, \quad p\text{-value} = 2.062 \times 10^{-11}\]
\[\text{Since } p\text{-value} < 0.05, \text{ we reject } H_0.\] Conclusion: There is a statistically significant difference in mean meteorite mass across continents.
summary_table <- meteorite_data_with_continents %>%
group_by(continent) %>%
summarize(count = n()) %>%
arrange(continent)
area_data <- data.frame(
continent = c("Africa", "Antarctica", "Asia", "Europe",
"North America", "South America", "Oceania"),
area_km2 = c(29648481, 13720000, 31033131, 22134710,
21330000, 17461112, 8486460)
)
summary_table <- meteorite_data_with_continents %>%
group_by(continent) %>%
summarize(count = n()) %>%
arrange(continent)
merged_table <- merge(summary_table, area_data, by = "continent")
merged_table <- merged_table %>%
mutate(landings_per_km2 = count / area_km2)
final_table <- merged_table %>%
select(continent, count, area_km2, landings_per_km2) %>%
knitr::kable(digits = c(0, 0, 0, 6), caption = "Meteorite Landings by Continent")
final_table
| continent | count | area_km2 | landings_per_km2 | geometry |
|---|---|---|---|---|
| Africa | 2690 | 29648481 | 0.000091 | MULTIPOINT ((-14.71633 24.6… |
| Antarctica | 22075 | 13720000 | 0.001609 | MULTIPOINT ((-125 -84.75), … |
| Asia | 3139 | 31033131 | 0.000101 | MULTIPOINT ((27.32997 37.35… |
| Europe | 125 | 22134710 | 0.000006 | MULTIPOINT ((-8.28 37.60833… |
| North America | 816 | 21330000 | 0.000038 | MULTIPOINT ((-64.43333 48.8… |
| Oceania | 436 | 8486460 | 0.000051 | MULTIPOINT ((117.9667 -31.5… |
| South America | 413 | 17461112 | 0.000024 | MULTIPOINT ((-41.73356 -20…. |
Variations in Land Area: Different continents have different land areas, which may affect the probability of meteorites being found and recorded. Larger continents may have more land to catch falling meteorites, resulting in larger finds and potentially heavier meteorites being discovered.
Data Quality and Collection Bias: Differences in meteorite mass across continents may also result from data quality and collection bias. Factors such as population density, exploration frequency, geological and environmental conditions, and scientific interest can all impact the likelihood of discovering and recording meteorites.
set.seed(123)
clusters <- kmeans(meteorite_data_cleaned[, c("reclat", "reclong")], centers = 500)
cluster_centers <- data.frame(
reclat = clusters$centers[, 1],
reclong = clusters$centers[, 2]
)
set.seed(123)
predicted_locations <- data.frame(
reclat = rnorm(5000, mean = rep(cluster_centers$reclat, each = 10), sd = 1),
reclong = rnorm(5000, mean = rep(cluster_centers$reclong, each = 10), sd = 1)
)
fig <- plot_ly(
data = predicted_locations,
lat = ~reclat,
lon = ~reclong,
type = "scattergeo",
mode = "markers",
marker = list(size = 3, color = "blue", opacity = 0.7),
text = ~paste0("Lat: ", round(reclat, 2), "<br>Lon: ", round(reclong, 2))
) %>%
layout(
title = "Enhanced Predicted Meteorite Landings",
geo = list(
showland = TRUE,
showcountries = TRUE,
projection = list(type = 'natural earth')
)
)
fig